Amanda Birmingham, CCBB, UCSD (abirmingham@ucsd.edu)
To run this notebook reproducibly, follow these steps:
In [ ]:
g_num_processors = 3
g_trimmed_fastqs_dir = '~/dual_crispr/test_data/test_set_2'
g_filtered_fastqs_dir = '~/dual_crispr/test_outputs/test_set_2'
g_min_trimmed_grna_len = 19
g_max_trimmed_grna_len = 21
g_len_of_seq_to_match = 19
In [ ]:
import inspect
import ccbb_pyutils.analysis_run_prefixes as ns_runs
import ccbb_pyutils.files_and_paths as ns_files
import ccbb_pyutils.notebook_logging as ns_logs
def describe_var_list(input_var_name_list):
description_list = ["{0}: {1}\n".format(name, eval(name)) for name in input_var_name_list]
return "".join(description_list)
ns_logs.set_stdout_info_logger()
In [ ]:
g_trimmed_fastqs_dir = ns_files.expand_path(g_trimmed_fastqs_dir)
g_filtered_fastqs_dir = ns_files.expand_path(ns_runs.check_or_set(g_filtered_fastqs_dir, g_trimmed_fastqs_dir))
print(describe_var_list(['g_trimmed_fastqs_dir', 'g_filtered_fastqs_dir']))
ns_files.verify_or_make_dir(g_filtered_fastqs_dir)
In [ ]:
import dual_crispr.scaffold_trim as trim
print(inspect.getsource(trim))
In [ ]:
import dual_crispr.count_filterer as fltr
print(inspect.getsource(fltr))
In [ ]:
import ccbb_pyutils.parallel_process_fastqs as ns_parallel
g_parallel_results = ns_parallel.parallel_process_paired_reads(g_trimmed_fastqs_dir,
trim.get_trimmed_suffix(trim.TrimType.FIVE_THREE), g_num_processors,
fltr.filter_pair_by_len, [g_min_trimmed_grna_len, g_max_trimmed_grna_len,
g_len_of_seq_to_match, g_filtered_fastqs_dir])
In [ ]:
print(ns_parallel.concatenate_parallel_results(g_parallel_results))
In [ ]:
print(ns_files.check_file_presence(g_trimmed_fastqs_dir, "", trim.get_trimmed_suffix(trim.TrimType.FIVE_THREE),
check_failure_msg="Construct filtering failed to produce filtered file(s)."))